Testing primers against PR2
Testing primers against PR2
1 Goal
- Testing primers against the PR2 database (latest version 4.11.1)
2 Notes about script
- Chunks
compute_matchesandstore_matcheshave only to be run in 2 cases- new version of pr2
- new set of primers
- In the other case, they can be set eval=FALSE
2.1 To do
- Check why there are some negative amplicon (probably some matches ahead - Put a condition that amplicon should be >)
3 Initialize
Load the variables common to the different scripts and the necessary libraries
# Libraries for bioinfo ----------------------------------------------------
library("Biostrings")
# Libraries tidyr ---------------------------------------------------------
library("ggplot2")
library("dplyr")
library("readxl")
library("tibble")
library("readr")
library("purrr")
library("forcats")
library("lubridate")
library("stringr")
# Libraries dvutils and pr2database -------------------------------------------------------
if(any(grepl("package:dvutils", search()))) detach("package:dvutils", unload=TRUE)
library("dvutils")
if(any(grepl("package:pr2database", search()))) detach("package:pr2database", unload=TRUE)
library("pr2database")
# Options for knitting the report -------------
library(knitr)
library(rmdformats)
library(kableExtra)
knitr::opts_chunk$set(fig.width=8,
fig.height=6,
eval=TRUE,
cache=TRUE,
echo=TRUE,
prompt=FALSE,
tidy=TRUE,
comment=NA,
message=FALSE,
warning=FALSE)
opts_knit$set(width=90)
options(max.print="500")
options(knitr.kable.NA = '')4 Constants
5 Read primer file
5.1 Compute position on yeast - DO NOT RERUN
# primers <- read_excel(path = "primers_18S.xlsx", sheet ="primers")
# Read from local database
pr2_db <- db_info("pr2_local")
pr2_db_con <- db_connect(pr2_db)
primers <- tbl(pr2_db_con, "pr2_primers") %>% collect()
disconnect <- db_disconnect(pr2_db_con)
ref_seq <- pr2 %>% filter(pr2_accession == "FU970071.1.1799_U")
# Matches the position of the primers on the yeast sequence
# - use map2_dfr to get an data frame on output and not a list
# - use ~ when defining the function
primers_pos <- map2_dfr(primers$sequence,
primers$direction,
~ get_primer_position(.x, ref_seq$sequence, orientation =.y, mismatches = 3))
primers <- bind_cols(primers, primers_pos)
write_tsv(primers, path = "primers_matches.tsv", na = "")5.2 Build the primer set table 1
# primer_sets <- read_excel(path = "primers_18S.xlsx", sheet ="primer_sets")
# primers <- read_excel(path = "primers_18S.xlsx", sheet ="primers")
# Read from local database
pr2_db <- db_info("pr2_local")
pr2_db_con <- db_connect(pr2_db)
primers <- tbl(pr2_db_con, "pr2_primers") %>% collect()
primer_sets_all <- tbl(pr2_db_con, "pr2_primer_sets") %>% collect()
disconnect <- db_disconnect(pr2_db_con)
if(gene_selected == "18S_rRNA") {
gene_regions = c("V4", "V9")
# Just keep the selected primers (V4, V9 etc..)
primer_sets <- primer_sets_all %>%
filter(gene == gene_selected) %>%
filter(gene_region %in% gene_regions) %>%
filter(!(primer_set_id %in% 63:66))
} else{
primer_sets <- primer_sets %>%
filter(specificity == "plastid" | (gene == "18S_rRNA" & specificity == "universal" ))
}
primer_sets <- primer_sets %>%
left_join(select(primers,
primer_id,
fwd_name=name,
fwd_seq=sequence,
fwd_start_yeast= start_yeast,
fwd_end_yeast= end_yeast),
by = c("fwd_id" = "primer_id")) %>%
left_join(select(primers,
primer_id,
rev_name=name,
rev_seq=sequence,
rev_start_yeast= start_yeast,
rev_end_yeast= end_yeast),
by = c("rev_id" = "primer_id")) %>%
mutate(length_yeast = rev_end_yeast - fwd_start_yeast + 1) %>%
select(gene_region, specificity, primer_set_id,primer_set_name, contains("fwd"), contains("rev"),length_yeast, used_in:remark) %>%
arrange(gene_region, fwd_start_yeast)
write_tsv(primer_sets, path = "primers_Table_1.tsv", na = "")
knitr::kable(select(primer_sets,primer_set_id:primer_set_name, fwd_name, rev_name, length_yeast)) %>%
kableExtra::kable_styling()| primer_set_id | primer_set_name | fwd_name | rev_name | length_yeast |
|---|---|---|---|---|
| 40 | Zhan | Uni18SF | Uni18SR | 461 |
| 12 | Geisen | 3NDf | 1132r | 599 |
| 13 | Brate1 | 3NDf | V4_euk_R1 | 465 |
| 14 | Brate2 | 3NDf | V4_euk_R2 | 458 |
| 18 | Parfrey | 515F | 1119r | 598 |
| 33 | Needham | 515F Univ | 926R | 589 |
| 34 | Lambert | 515FY | NSR951 | 391 |
| 35 | UNonMet | EUK581-F | EUK1134-R | 578 |
| 4 | Hugerth2 | 563f | 1132r | 587 |
| 7 | Bass | V4_1f | TAReukREV3 | 417 |
| 8 | StoeckV4 | TAReuk454FWD1 | TAReukREV3 | 417 |
| 16 | PireddaV4 | TAReuk454FWD1 | V4 18S Next.Rev | 417 |
| 19 | Vannini | Claudia Vannini (F) | Claudia Vannini (R) | 439 |
| 36 | StoeckV4 | TAReuk454FWD1 | V4RB | 417 |
| 1 | Hadziavdic1 | F-566 | R-1200 | 635 |
| 15 | Moreno | EUKAF | EUKAR | 410 |
| 17 | Comeau | E572F | E1009R | 438 |
| 22 | Kim | 528F | Nex_18S_0964_R | 419 |
| 26 | Pawlowski | 528F | S12.2R | 445 |
| 25 | Mangot | NSF563 | NSR951 | 380 |
| 2 | Hadziavdic2 | A-528F | R-952 | 379 |
| 39 | Egge | A-528F | PRYM01+7 | 396 |
| 3 | Hugerth1 | 574*f | 1132r | 576 |
| 23 | Venter | 590F | 1300R | 720 |
| 77 | Hugerth5 | 574f | 1132r | 576 |
| 21 | Zimmerman | D512for | D978rev | 437 |
| 41 | Harder | Cerc479F | Cerc750R | 283 |
| 24 | Simon | EK-565F-NGS | EUK1134-R | 527 |
| 5 | Hugerth3 | 616f | 1132r | 534 |
| 6 | Hugerth4 | 616*f | 1132r | 534 |
| 28 | Amaral1 | 1380F | 1510R | 176 |
| 29 | Amaral2 | 1389F | 1510R | 167 |
| 31 | PireddaV9 | 1388F | 1510R | 167 |
| 27 | StoeckV9 | 1391F | EukB | 169 |
6 Computing the matches
This part is done by an R script PR2 Primers pr2_match.R that is executed in batch mode.
6.1 Programing Notes
- Use Biostrings
Accessor methods : In the code snippets below, x is an MIndex object.
- length(x): The number of patterns that matches are stored for.
- names(x): The names of the patterns that matches are stored for.
- startIndex(x): A list containing the starting positions of the matches for each pattern.
- endIndex(x): A list containing the ending positions of the matches for each pattern.
- elementNROWS(x): An integer vector containing the number of matches for each pattern.
- x[[i]]: Extract the matches for the i-th pattern as an IRanges object.
- unlist(x, recursive=TRUE, use.names=TRUE): Return all the matches in a single IRanges object. recursive and use.names are ignored.
- extractAllMatches(subject, mindex): Return all the matches in a single XStringViews object.
One could also use another function which does not give the position * match_fwd <- vcountPattern(fwd, seq,max.mismatch=0, min.mismatch=0, with.indels=FALSE, fixed=FALSE, algorithm=“auto”)
7 Load the data file
This avoids recomputing each time.
All sequences for which introns have been removed are filtered out (contain "_UC" in pr2_accession)
load(file=str_c("pr2_match_",gene_selected,".rda"))
print(str_c("Before filtration: ", nrow(pr2_match_final)))[1] "Before filtration: 5112870"
pr2_match_final_filtered <- pr2_match_final %>%
# Remove sequences for which the introns have been removed
filter(! str_detect(pr2_accession, "_UC")) %>%
# Remove sequence that are shorter
filter((gene_region == "V4" & sequence_length>= sequence_length_min_V4) |
(gene_region == "V9" & sequence_length>= sequence_length_min_V9) |
(! (gene_region %in% c("V4", "V9") & sequence_length>= sequence_length_min)))
print(str_c("After filtration: ", nrow(pr2_match_final_filtered)))[1] "After filtration: 3074625"
8 Summarize the data in tables
8.1 Summarize all eukaryotes
pr2_match_summary_primer_set <- pr2_match_final_filtered %>%
group_by(gene_region, primer_label, primer_set_id ) %>%
summarize (pct_fwd = sum(!is.na(fwd_pos))/n()*100,
pct_rev = sum(!is.na(rev_pos))/n()*100,
pct_amplicons = sum(!is.na(ampli_size))/n()*100,
ampli_size_mean = mean(ampli_size, na.rm=TRUE),
ampli_size_sd = sd(ampli_size, na.rm=TRUE),
ampli_size_max = max(ampli_size, na.rm=TRUE),
ampli_size_min = min(ampli_size, na.rm=TRUE),
n_seq = n())
write_tsv(pr2_match_summary_primer_set, str_c("pr2_match_summary_primer_set_",gene_selected,".tsv"), na="")
# Long form for Number of sequences
# This dataframe is used to re-order the bars correctly
pct_category_order <- data.frame(pct_category = c("pct_amplicons","pct_fwd","pct_rev"), pct_category_order = c(3, 1, 2))
pr2_match_summary_primer_set_long <- pr2_match_summary_primer_set %>%
tidyr::gather("pct_category", "pct_seq", pct_fwd:pct_amplicons) %>%
left_join(pct_category_order)8.2 Summarize per supergroup
pr2_match_summary_primer_set_sg <- pr2_match_final_filtered %>%
group_by(supergroup, gene_region, primer_label, primer_set_id) %>%
summarize (pct_fwd = sum(!is.na(fwd_pos))/n()*100,
pct_rev = sum(!is.na(rev_pos))/n()*100,
pct_amplicons = sum(!is.na(ampli_size))/n()*100,
ampli_size_mean = case_when (pct_amplicons>0 ~ mean(ampli_size, na.rm=TRUE)),
ampli_size_sd = case_when (pct_amplicons>0 ~ sd(ampli_size, na.rm=TRUE)),
ampli_size_max = case_when (pct_amplicons>0 ~ max(ampli_size, na.rm=TRUE)),
ampli_size_min = case_when (pct_amplicons>0 ~ min(ampli_size, na.rm=TRUE)),
n_seq = n()) %>%
ungroup()
write_tsv( pr2_match_summary_primer_set_sg, str_c("pr2_match_summary_primer_set_",gene_selected,"_per_sg.tsv"), na="")8.3 Summarize per class
pr2_match_summary_primer_set_class <- pr2_match_final_filtered %>%
group_by(supergroup, division, class, gene_region, primer_label, primer_set_id) %>%
summarize (pct_fwd = sum(!is.na(fwd_pos))/n()*100,
pct_rev = sum(!is.na(rev_pos))/n()*100,
pct_amplicons = sum(!is.na(ampli_size))/n()*100,
ampli_size_mean = case_when (pct_amplicons>0 ~ mean(ampli_size, na.rm=TRUE)),
ampli_size_sd = case_when (pct_amplicons>0 ~ sd(ampli_size, na.rm=TRUE)),
ampli_size_max = case_when (pct_amplicons>0 ~ max(ampli_size, na.rm=TRUE)),
ampli_size_min = case_when (pct_amplicons>0 ~ min(ampli_size, na.rm=TRUE)),
n_seq = n())
write_tsv(pr2_match_summary_primer_set_class, str_c("pr2_match_summary_primer_set_",gene_selected,"_per_class.tsv"), na="") 9 Amplicon length
9.1 Average size
ggplot(pr2_match_final, aes(x= primer_label, y=ampli_size, group=primer_set_id)) +
geom_boxplot(outlier.alpha = 0.3) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Primer set")9.2 Plot an example of amplicon distribution (Fig. 3)
fig3A <- list()
fig3B <- list()
for (one_primer_set in c(4, 29)){
if (one_primer_set == 4 ) {
xmin=570
xmax = 610
xmax2 = 2000
} else {
xmin=130
xmax = 190
xmax2 = 1000
}
pr2_match_final_one <- pr2_match_final %>%
filter(primer_set_id==one_primer_set) %>%
filter( !(supergroup %in% c("Apusozoa", "Eukaryota_X", "Protoalveoalata") ))
primer_label <- str_replace_all(pr2_match_final_one$primer_label[1], "_", " ")
g <- ggplot(pr2_match_final_one, aes(x= ampli_size)) +
geom_density(fill="blue", alpha=0.9) +
xlab("Amplicon size") +
ggtitle(str_c("Primer set - ", primer_label)) +
xlim(xmin,xmax)
print(g)
fig3A[[one_primer_set]] <- ggplot(pr2_match_final_one, aes(x= ampli_size, fill=supergroup)) +
geom_density(alpha=0.9)+
theme_bw() +
theme(legend.position = "none") +
scale_fill_viridis_d(option = "inferno") +
xlab("Amplicon size (bp)") + ylab("Density") +
ggtitle(str_c("Primer set - ", primer_label)) +
xlim(xmin,xmax)
print(fig3A[[one_primer_set]])
fig3B[[one_primer_set]] <- ggplot(pr2_match_final_one, aes(x= supergroup, y=ampli_size)) +
geom_boxplot(outlier.alpha = 0.3)+
theme_bw() +
coord_flip()+
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Supergroup") + ylab("Amplicon size (bp)") +
ylim(0,xmax2)
print(fig3B[[one_primer_set]])
g <- ggplot(pr2_match_final_one, aes(x= supergroup, y=ampli_size)) +
geom_violin() +
coord_flip()+
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Supergroup") + ylab("Amplicon size (bp)")
print(g)
}9.3 Plot an example of amplicon size distribution
fig3C <- list()
for (one_primer_set in c(4, 29)) {
pr2_match_summary_filtered <- pr2_match_summary_primer_set_sg %>%
filter((n_seq > 20) &
(primer_set_id == one_primer_set) &
!(supergroup %in% c("Apusozoa", "Eukaryota_X") ))
fig3C[[one_primer_set]] <- ggplot(pr2_match_summary_filtered) +
geom_col(data=pr2_match_summary_filtered,
aes(x=str_c(supergroup, " - n = ", n_seq),
y=pct_amplicons,
fill=supergroup),
position="dodge") +
theme_bw()+
theme(legend.position = "none") +
scale_fill_viridis_d(option = "inferno") +
coord_flip() +
ylab("% of sequences amplified") + xlab("") # +
# ggtitle (str_c("Set - ", one_primer_set, " - % amplified per Supergroup") )
print(fig3C[[one_primer_set]])
g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) +
geom_point(aes(x=str_c(supergroup, " - n = ", n_seq),
y=ampli_size_mean),
colour="black") +
coord_flip() +
geom_errorbar(aes(x=str_c(supergroup, " - n = ", n_seq),
ymax=ampli_size_mean + ampli_size_sd,
ymin=ampli_size_mean - ampli_size_sd)) +
xlab("Supergroup") + ylab("Amplicon size (bp)") # +
# scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
# ggtitle (str_c("Set - ", one_primer_set,
# " - Amplicon sizes") ) +
# geom_hline(yintercept = c(450,550) , linetype= 2)
print(g)
}9.4 Fig. 3
# g.fig3A <- ggplotGrob(fig3A) # convert to gtable
# g.fig3B <- ggplotGrob(fig3B) # convert to gtable
#
# fig3A.widths <- g.fig3A$widths[1:3] # extract the first three widths,
# # corresponding to left margin, y lab, and y axis
# fig3B.widths <- g.fig3B$widths[1:3] # same for mpg plot
# max.widths <- grid::unit.pmax(fig3A.widths, fig3B.widths) # calculate maximum widths
# g.fig3A$widths[1:3] <- max.widths # assign max. widths to iris gtable
# g.fig3B$widths[1:3] <- max.widths # assign max widths to mpg gtable
cowplot::plot_grid(fig3A[[4]], fig3B[[4]], fig3C[[4]],fig3A[[29]], fig3B[[29]], fig3C[[29]],
labels = c("A","","", "B","",""), ncol = 3, nrow = 2, align = "h")10 Graphics
10.1 All Eukaryotes
Comments
- Percent of sequences amplified
- Logically, the % of seq amplified is always < min(% of sequences matching forwar, % of sequences matching reverse)
- In general it is the reverse primer that causes problems.
- Some primer sets do not amplify any sequence (11, 19, 20, 21)
- For V9, primer set 30 reverse primer is in the ITS region which is not present in the PR2 sequences, so no amplification.
- Amplicon sizes
- Only 8 V4 primer sets are suitable for Illumina 2x250 (max amplicon size = 450 bp)
- This is if we consider the mean… If we consider the variation around the mean then, only 3 suitable for 2x250
- Five more V4 primer sets are suitable for Illumina 3x250 (max amplicon size = 550 bp)
for (one_region in gene_regions) {
pr2_match_summary_primer_set_region_long <- filter(pr2_match_summary_primer_set_long, gene_region== one_region)
pr2_match_summary_primer_set_region <- filter(pr2_match_summary_primer_set, gene_region== one_region)
pr2_match_region <- filter(pr2_match_final, gene_region == one_region)
g <- ggplot(pr2_match_summary_primer_set_region_long) +
geom_col(aes(x=reorder(primer_label, primer_set_id), y=pct_seq,
fill=fct_reorder(pct_category, pct_category_order)), width=.7, position = "dodge") +
theme_bw() +
xlab("Primer set") + ylab("% of sequences amplified") +
scale_fill_manual(name = "% amplified", values = c("pct_amplicons" = "black", "pct_fwd" = "blue","pct_rev" = "red"),
labels=c( "Primer fwd", "Primer rev", "Amplicons")) +
ggtitle (str_c(one_region, " - Percentage of sequences recovered"))+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(g)
g <- ggplot(filter(pr2_match_summary_primer_set_region, !is.nan(ampli_size_mean))) +
geom_point(aes(x=reorder(primer_label, primer_set_id), y=ampli_size_mean), colour="black") +
geom_errorbar(aes(x=reorder(primer_label, primer_set_id),
ymax=ampli_size_mean + ampli_size_sd,
ymin=ampli_size_mean - ampli_size_sd)) +
theme_bw() +
xlab("Primer set") + ylab("Amplicon size (bp)") +
# scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
ggtitle (str_c(one_region, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively") ) +
geom_hline(yintercept = c(450,550), linetype = c(2,3) )+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(g)
g <- ggplot(pr2_match_region) +
geom_boxplot(aes(x=reorder(primer_label, primer_set_id), y=ampli_size), colour="black", outlier.alpha = 0.3) +
theme_bw() +
xlab("Primer set") + ylab("Amplicon size (bp)") +
# scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
ggtitle (str_c(one_region, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively") ) +
geom_hline(yintercept = c(450,550), linetype = c(2,3) )+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(g)
}10.2 By supergroup
Comments
- Excavata have a very different patterns from the rest of the group. They are not amplified by quite a few primer sets. They have also bigger amplicons
- Some groups exhibit higher variability in amplicon size (e.g Chlorophyta)
for (one_region in gene_regions) {
pr2_match_summary_primer_set_sg_region <- filter(pr2_match_summary_primer_set_sg,
(gene_region == one_region) &(n_seq > 20))
g <- ggplot(pr2_match_summary_primer_set_sg_region) +
geom_col(aes(x=supergroup,
y=pct_amplicons),
fill="grey",
position="dodge") +
theme_bw() +
coord_flip() +
ylab("% of sequences amplified") +
xlab("Supergroup") +
ggtitle (str_c(one_region, " - % amplified per Supergroup") ) +
facet_wrap(~ primer_label, scales ="fixed")
print(g)
g <- ggplot(filter(pr2_match_summary_primer_set_sg_region, !is.nan(ampli_size_mean))) +
geom_point(aes(x=supergroup,
y=ampli_size_mean),
colour="black") +
theme_bw() +
coord_flip() +
geom_errorbar(aes(x=supergroup,
ymax=ampli_size_mean + ampli_size_sd,
ymin=ampli_size_mean - ampli_size_sd)) +
xlab("Supergroup") +
ylab("Amplicon size (bp)") +
ggtitle (str_c(one_region, " - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively") ) +
geom_hline(yintercept = c(450,550) , linetype= 2) +
facet_wrap(~ primer_label, scales = "fixed")
print(g)
}10.3 By class for autotrophs
for (one_region in gene_regions) {
pr2_match_summary_filtered <- filter(pr2_match_summary_primer_set_class,
(n_seq > 20) &
(division %in% c("Haptophyta", "Dinoflagellata",
"Chlorophyta", "Ochrophyta", "Cryptophyta")) &
(gene_region == one_region))
if(nrow(pr2_match_summary_filtered) > 0 ) {
g <- ggplot(pr2_match_summary_filtered) +
geom_col(data=pr2_match_summary_filtered,
aes(x=str_c(str_trunc(division, 3, ellipsis = ""), "-", class),
y=pct_amplicons),
fill="grey", position="dodge") +
theme_bw() +
coord_flip() +
ylab("% of sequences amplified") + xlab("Class") +
ggtitle (str_c("Set -", one_primer_set, " - % amplified per Class") ) +
facet_wrap(~ primer_label, scales ="fixed")
print(g)
g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) +
geom_point(aes(x=str_c(str_trunc(division, 3, ellipsis = ""), "-", class),
y=ampli_size_mean),
colour="black") +
theme_bw() +
coord_flip() +
geom_errorbar(aes(x=str_c(str_trunc(division, 3, ellipsis = ""), "-", class),
ymax=ampli_size_mean + ampli_size_sd,
ymin=ampli_size_mean - ampli_size_sd)) +
xlab("Class") + ylab("Amplicon size (bp)") +
# scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
ggtitle (str_c("Set -", one_primer_set,
" - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively") ) +
geom_hline(yintercept = c(450,550) , linetype= 2) +
facet_wrap(~ primer_label, scales ="fixed")
print(g)
}
}11 Specific analyses
11.1 Specific et sets for Opisthokonta
- 35 UnNonMet
- 16 Piredda
- 17 Comeau
for (one_primer_set in c(16, 17, 35)) {
pr2_match_summary_filtered <- filter(pr2_match_summary_primer_set_class,
(n_seq > 20) &
(supergroup %in% c("Opisthokonta")) &
(primer_set_id == one_primer_set))
g <- ggplot(pr2_match_summary_filtered) +
geom_col(data=pr2_match_summary_filtered,
aes(x=str_c(division, "-", class, " - n= ", n_seq),
y=pct_amplicons),
fill="grey", position="dodge") +
theme_bw() +
coord_flip() +
ylab("% of sequences amplified") + xlab("Class") +
ggtitle (str_c("Set -", one_primer_set, " - % amplified per Class") )
print(g)
g <- ggplot(filter(pr2_match_summary_filtered, !is.nan(ampli_size_mean))) +
geom_point(aes(x=str_c(division, "-", class, " - n= ", n_seq),
y=ampli_size_mean),
colour="black") +
coord_flip() +
geom_errorbar(aes(x=str_c(division, "-", class, " - n= ", n_seq),
ymax=ampli_size_mean + ampli_size_sd,
ymin=ampli_size_mean - ampli_size_sd)) +
xlab("Class") + ylab("Amplicon size (bp)") +
# scale_y_continuous(breaks = (1:8)*200, limits = c(0,1500)) +
ggtitle (str_c("Set -", one_primer_set,
" - Amplicon size - Lines correspond to limits for Illumina 2x250 and 2x300 respectively") ) +
geom_hline(yintercept = c(450,550) , linetype= 2)
print(g)
}